import os
import json

def data_process(split_dirs, split="train"):    
    subtask_list = []
    for dir_path in os.listdir(split_dirs):
        final_path = os.path.join(split_dirs, dir_path)
        for file in os.listdir(final_path):
            if file.endswith('info.txt'):
                task_path = os.path.join(final_path, file)

                with open(task_path) as file:
                    content = file.read()

                content = content.split("\n\n")
                task = content[0]
                historical = ""
                task_dir = "/".join((task_path.split('/')[:-1]))

                for i, subtask_info in enumerate(content[1:]):
                    subtask_dict = {}
                    subtask_info = subtask_info.strip().split("\n")
                    if subtask_info[0] == '':
                        continue
                    subtask_id, decision = subtask_info[0].split(': ')
                    subtask_id = subtask_id.lower()
                    analysis = subtask_info[1]

                    if "End" not in decision:
                        action, model_choice = decision.strip(')').split(' (')
                    else:
                        action = "[End]"
                        model_choice = "None"

                    image_paths = [
                        os.path.join(task_dir, f"{subtask_id}_front.png"),
                        os.path.join(task_dir, f"{subtask_id}_left.png"),
                        os.path.join(task_dir, f"{subtask_id}_back.png"),
                        os.path.join(task_dir, f"{subtask_id}_right.png")
                    ]
                    for path in image_paths:
                        if not os.path.exists(path):
                            raise FileNotFoundError(f"File does NOT exist: {path}")
                    if i == 0:
                        instruction = f"{task}\nInventory: None\nHistorical Execution: None\nFeedback: None\nNow, please output Analysis, Subtask and Model, according to the instruction above."
                    else:
                        instruction = f"{task}\n{inventory}\nHistorical Execution:{historical}\n{feedback}\nNow, please output Analysis, Subtask and Model, according to the instruction above."
                    answer = f"{analysis}\nSubtask: {action}\nModel: {model_choice}"

                    feedback = subtask_info[2]
                    inventory = subtask_info[3]
                    if "None" in feedback:  
                        historical += f"({i+1}){decision} (success)\n" 
                    else:
                        historical += f"({i+1}){decision} (fail)\n" 

                    conv = []
                    conv.append({"from": "human", "value": instruction})
                    conv.append({"from": "gpt", "value": answer})

                    task_id = '_'.join(task_path.split('.')[0].split('/')[-2:])

                    subtask_dict["id"] = task_id + '_' + subtask_id
                    subtask_dict["image"] = image_paths
                    subtask_dict["conversations"] = conv

                    subtask_list.append(subtask_dict)

    return subtask_list

root_path = ""
subtask_list = data_process(root_path)
len(subtask_list)

def data_sift(subtask_list):
    sft_data = []
    dpo_data = []
    flag = 1
    for i in range(1, len(subtask_list)):
        if "Feedback: None" in subtask_list[i]["conversations"][0]["value"]:
            sft_data.append(subtask_list[i-1])
            if flag == 0:
                dpo_data.append({
                    "prompt": subtask_list[i-2]["conversations"][0]["value"],
                    "chosen": '\n'.join(subtask_list[i-1]["conversations"][1]["value"].split('\n')[1:]),
                    "rejected": '\n'.join(subtask_list[i-2]["conversations"][1]["value"].split('\n')[1:])
                })
                flag = 1
        else: 
            flag = 0
    sft_data.append(subtask_list[-1])

    print(f"sft size: {len(sft_data)}\ndpo size: {len(dpo_data)}")
    return sft_data, dpo_data

sft_data, dpo_data = data_sift(subtask_list)


def dpo_augment(sft_data, dpo_data):
    for i in range(len(sft_data)):
        prompt = sft_data[i]["conversations"][0]["value"]
        chosen = '\n'.join(sft_data[i]["conversations"][1]["value"].split('\n')[1:])
        if "End" in sft_data[i]["conversations"][1]["value"]:
            continue
        def model_change(chosen):
            if "NoMaD" in chosen:
                return chosen.replace("NoMaD", "PixNav")
            elif "PixNav" in chosen:
                return chosen.replace("PixNav", "NoMaD")
            elif "octo" in chosen:
                return chosen.replace("octo", "RT-1-X")
            else:
                return chosen.replace("RT-1-X", "octo")
        
        def order_change(i, sft_data):
            return '\n'.join(sft_data[i+1]["conversations"][1]["value"].split('\n')[1:])

        def action_change(chosen):
            if "Pick" in chosen:
                return chosen.replace("Pick", "Fetch")
            elif "Put" in chosen:
                return chosen.replace("Put", "Place")
            elif "Go to" in chosen:
                return chosen.replace("Go to", "Move")
            elif "Open" in chosen:
                return chosen.replace("Open", "Pull")
            elif "Close" in chosen:
                return chosen.replace("Close", "Push")
         
        reject1 = model_change(chosen)
        reject2 = order_change(i, sft_data)
        reject3 = action_change(chosen)
        dpo_data.append({"prompt": prompt, "chosen": chosen, "rejected": reject1})
        dpo_data.append({"prompt": prompt, "chosen": chosen, "rejected": reject2})
        dpo_data.append({"prompt": prompt, "chosen": chosen, "rejected": reject3})
    
    print(len(dpo_data))
    return dpo_data
        
dpo_data = dpo_augment(sft_data, dpo_data)



